In [100]:
import pandas as pd
In [101]:
df = pd.read_csv(r'C:\Users\imane\OneDrive\Desktop\Data4good\p2-arbres-fr.csv', sep =";", encoding = "utf-8")
import missingno as msno
msno.bar(df)
Out[101]:
<AxesSubplot:>
In [102]:
df.drop('id', axis = 1, inplace= True)
df.drop("complement_addresse",axis =1, inplace = True)
df.drop("numero",axis =1, inplace = True)
df.drop("id_emplacement",axis =1, inplace = True)
df.drop("espece",axis =1, inplace = True)
df.drop("variete",axis =1, inplace = True)
df.drop("remarquable",axis =1, inplace = True)
df.drop("type_emplacement",axis =1, inplace = True)
df.drop("libelle_francais",axis =1, inplace = True)
In [103]:
df.drop(df.index[(df["circonference_cm"] == 0)], axis = 0, inplace=True)
df.drop(df.index[(df["hauteur_m"] == 0)], axis = 0, inplace=True)
df.drop(df.index[(df["circonference_cm"] > 470)], axis = 0, inplace=True)
df.drop(df.index[(df["hauteur_m"] > 35 )], axis = 0, inplace=True)
df.dropna(subset=["circonference_cm"],inplace=True)
df.dropna(subset=["hauteur_m"],inplace=True)
df.dropna(subset=["stade_developpement"], inplace = True)
In [104]:
df["hauteur_m"] = 100 * df["hauteur_m"]
In [89]:
new_df = df.rename(columns={"hauteur_m":"hauteur_cm"})

new_df
Out[89]:
domanialite arrondissement lieu genre circonference_cm hauteur_cm stade_developpement geo_point_2d_a geo_point_2d_b
1 Jardin PARIS 7E ARRDT MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E Taxus 65 800 A 48.857656 2.321031
2 Jardin PARIS 7E ARRDT MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E Taxus 90 1000 A 48.857705 2.321061
3 Jardin PARIS 7E ARRDT MAIRIE DU 7E 116 RUE DE GRENELLE PARIS 7E Acer 60 800 A 48.857722 2.321006
8 Jardin PARIS 16E ARRDT JARDIN DE L AVENUE FOCH / 10 AVENUE FOCH Sophora 145 1400 A 48.871990 2.275814
9 Jardin PARIS 16E ARRDT JARDIN DE L AVENUE FOCH / 10 AVENUE FOCH Sophora 135 1000 A 48.872046 2.275752
... ... ... ... ... ... ... ... ... ...
198859 DJS PARIS 19E ARRDT CENTRE SPORTIF JULES LADOUMEGUE / 35 ROUTE DES... Populus 20 500 J 48.890466 2.397443
198860 Jardin PARIS 14E ARRDT PARC MONTSOURIS Fagus 55 700 J 48.823919 2.337872
198861 Jardin PARIS 14E ARRDT PARC MONTSOURIS Taxus 55 500 JA 48.821099 2.338411
198862 Jardin PARIS 14E ARRDT PARC MONTSOURIS Taxus 75 500 JA 48.823552 2.337892
198865 DJS PARIS 13E ARRDT CENTRE SPORTIF GEORGES CARPENTIER / 81 BOULEVA... Acer 165 1100 A 48.819252 2.370641

129996 rows × 9 columns

In [105]:
new_df.boxplot(column=['circonference_cm'])
Out[105]:
<AxesSubplot:>
In [106]:
new_df.boxplot(column=['hauteur_cm'])
Out[106]:
<AxesSubplot:>
In [107]:
msno.bar(new_df)
Out[107]:
<AxesSubplot:>
In [108]:
import seaborn as sns
n_tree = [1 for i in range(len(new_df))]
new_df["n_tree"] = n_tree
In [109]:
new_df["arrondissement"].replace({"PARIS 10E ARRDT":"10E ARR","PARIS 11E ARRDT":"11E ARR","PARIS 12E ARRDT":"12E ARR",
                                  "PARIS 13E ARRDT":"13E ARR","PARIS 14E ARRDT":"14E ARR","PARIS 15E ARRDT":"15E ARR",
                                  "PARIS 16E ARRDT":"16E ARR","PARIS 17E ARRDT":"17E ARR","PARIS 18E ARRDT":"18E ARR",
                                  "PARIS 19E ARRDT":"19E ARR","PARIS 1ER ARRDT":"1ER ARR","PARIS 20E ARRDT":"20E ARR",
                                  "PARIS 2E ARRDT":"2E ARR","PARIS 3E ARRDT":"3E ARR","PARIS 4E ARRDT":"4E ARR",
                                  "PARIS 5E ARRDT":"5E ARR","PARIS 6E ARRDT":"6E ARR","PARIS 7E ARRDT":"7E ARR",
                                  "PARIS 8E ARRDT":"8E ARR","PARIS 9E ARRDT":"9E ARR","SEINE-SAINT-DENIS":"S.S.DENIS",
                                  "VAL-DE-MARNE":"V.MARNE","BOIS DE BOULOGNE":"B. BOULOGNE","BOIS DE VINCENNES":"B.VINCENNES",
                                  "HAUTS-DE-SEINE":"H.SEINE"}, inplace = True)
In [110]:
new_df_2 = new_df.groupby("arrondissement").sum()
new_df_2.drop("circonference_cm",axis =1, inplace = True)
new_df_2.drop("hauteur_cm",axis =1, inplace = True)
new_df_2.drop("geo_point_2d_a",axis =1, inplace = True)
new_df_2.drop("geo_point_2d_b",axis =1, inplace = True)
In [111]:
for_sns = pd.melt(new_df_2.reset_index(),id_vars=['arrondissement'],value_vars=new_df_2.columns)
p = sns.barplot(y="arrondissement",x="value", data = for_sns,hue = "variable")
p.set_title(" Nombre d'arbres par arroundissement")
Out[111]:
Text(0.5, 1.0, " Nombre d'arbres par arroundissement")
In [120]:
genre = new_df[["n_tree","genre"]]
df_map = genre.copy()
n_df = df_map.groupby(["genre"]).sum().reset_index()
sns.barplot(data = n_df, x= "genre", y="n_tree")
Out[120]:
<AxesSubplot:xlabel='genre', ylabel='n_tree'>
In [121]:
selected_col = new_df[["arrondissement","circonference_cm","hauteur_cm"]]
new_df_3 = selected_col.copy()
In [122]:
ndf_3 = new_df_3.groupby("arrondissement").mean()
In [123]:
for_sns_2 = ndf_3.reset_index()
In [124]:
g = sns.scatterplot(data = for_sns_2,x = "circonference_cm",y = "hauteur_cm",hue = "arrondissement")
g.set_title("hauteur_cm et circonference_cm moyenne par arrondissement")
Out[124]:
Text(0.5, 1.0, 'hauteur_cm et circonference_cm moyenne par arrondissement')
In [125]:
selected_col_2 = new_df[["stade_developpement","hauteur_cm","circonference_cm"]]
new_df_4 = selected_col_2.copy()
In [126]:
ndf_4 = new_df_4.groupby("stade_developpement").mean()
In [127]:
for_sns_3 = ndf_4.reset_index()
import matplotlib.pyplot as plt
fig,ax = plt.subplots()
ax = sns.lineplot(data = for_sns_3, x= "stade_developpement",y="circonference_cm")
ax1 = sns.lineplot(data = for_sns_3,x = "stade_developpement", y = "hauteur_cm")
ax.set_title("Hauteur_cm et circonference_cm moyenne par stade de developpement")
Out[127]:
Text(0.5, 1.0, 'Hauteur_cm et circonference_cm moyenne par stade de developpement')
In [128]:
selected_col_3 = new_df[["stade_developpement","n_tree","arrondissement"]]
new_df_5 = selected_col_3.copy()
ndf_5 = new_df_5.groupby(["arrondissement","stade_developpement"]).sum().reset_index()
graph = sns.histplot(data = ndf_5,x= "arrondissement", weights= "n_tree", hue = "stade_developpement", multiple="stack")
graph.set_title("Nombre d'arbres par arrondissement et stade de developpement")
Out[128]:
Text(0.5, 1.0, "Nombre d'arbres par arrondissement et stade de developpement")
In [129]:
selected_col_4 = new_df[["n_tree","domanialite","arrondissement"]]
new_df_6 = selected_col_4.copy()
ndf_6 = new_df_6.groupby(["domanialite","arrondissement"]).sum().reset_index()

import plotly.express as px
figure = px.treemap(ndf_6,path = ["domanialite","arrondissement"], values="n_tree")
figure.show()
In [130]:
selected_col_5 = new_df[["arrondissement","n_tree","geo_point_2d_a","geo_point_2d_b"]]
new_df_7 = selected_col_5.copy()
ndf_7 = new_df_7.groupby("arrondissement").sum().reset_index()
ndf_7.drop('geo_point_2d_a', axis = 1, inplace= True)
ndf_7.drop('geo_point_2d_b', axis = 1, inplace= True)
new_df_8 = new_df_7.groupby(["arrondissement"]).nth(0).reset_index()
a = new_df_8["geo_point_2d_a"]
b = new_df_8["geo_point_2d_b"]
ndf_7 = ndf_7.join(a)
ndf_7 = ndf_7.join(b)
import folium
map = folium.Map(location=[48.856614, 2.3522219], zoom_start=14,control_scale=True,tiles="Stamen Terrain")
for i in range(0,len(ndf_7)):
    folium.Circle(
    location = [ndf_7.iloc[i]["geo_point_2d_a"], ndf_7.iloc[i]["geo_point_2d_b"]],
    tooltip = (ndf_7.iloc[i]["arrondissement"],ndf_7.iloc[i]["n_tree"]) ,
        radius = int(ndf_7.iloc[i]["n_tree"]) / len(ndf_7), fill = True,
         
    ).add_to(map)
map
Out[130]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [131]:
selected_col_6 = new_df[["arrondissement","geo_point_2d_a","geo_point_2d_b","domanialite"]]
new_df_8 = selected_col_6.copy()
scatter_map = px.scatter_mapbox(new_df_8,lat ="geo_point_2d_a",lon="geo_point_2d_b",hover_name="arrondissement", hover_data=["domanialite"],
                                zoom=14, height=500, color="arrondissement")
scatter_map.update_layout(mapbox_style="open-street-map")
In [ ]:
 
In [ ]: